DACSS 604 Project

Author

Rubi Gonzalez

Loading and Cleaning data

# necessary libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(dplyr)
library(ggplot2)
library(pollster)
library(knitr)
library(corrplot)
corrplot 0.92 loaded
library(Hmisc)

Attaching package: 'Hmisc'

The following objects are masked from 'package:dplyr':

    src, summarize

The following objects are masked from 'package:base':

    format.pval, units
library(nnet)
library(stargazer)

Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
library(MASS)

Attaching package: 'MASS'

The following object is masked from 'package:dplyr':

    select
library(broom)
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:MASS':

    select

The following object is masked from 'package:Hmisc':

    subplot

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
# uploading data
ccamData <- read_sav("CCAMData2023.sav")
# uploading data
#ccamData <- read_sav("CCAMData.sav")
# creating my smaller dataset!
myData <- ccamData %>%
  dplyr::select(wave, year, happening, cause_recoded, worry, harm_personally, harm_US, harm_future_gen, harm_plants_animals, when_harm_US,
                region9, generation, educ_category) %>% 
  rename("education" = "educ_category") 

# subsetting with data for the last available wave, Oct 2023
myData <- subset(myData, wave == 29)
# making variables numeric or factors
myData$wave <- as.numeric(myData$wave)
myData$happening <- as.factor(myData$happening)
myData$cause_recoded <- as.factor(myData$cause_recoded)
myData$generation <- as.factor(myData$generation)
myData$education <- as.factor(myData$education)
myData$worry <- as.numeric(myData$worry)
myData$harm_personally <- as.factor(myData$harm_personally)
myData$harm_US <- as.factor(myData$harm_US)
myData$harm_future_gen <- as.factor(myData$harm_future_gen)
myData$harm_plants_animals <- as.factor(myData$harm_plants_animals)
myData$when_harm_US <- as.factor(myData$when_harm_US)
myData$region9 <- as.factor(myData$region9)
# recoding variables
myData <- myData %>%
  mutate(#year = recode(year, "13" = "2021"),
         
         wave = recode(wave, "29" = "Oct 2023"),
         
         cause_recoded = recode(cause_recoded, "3" = "CC_not_happening", "4" = "Nat_changes", "5" = "Human_act_and_nat_changes",
                                "6" = "human_act"),
         
         happening = recode(happening, "1" = "No", "3" = "Yes"),
         
         harm_personally = recode(harm_personally, "1" = "Not at all", "2" = "Only a little", "3" = "A moderate amount", "4" = "A great deal"),
         
         harm_US = recode(harm_US, "1" = "Not at all", "2" = "Only a little", "3" = "A moderate amount", "4" = "A great deal"),
         
         harm_future_gen = recode(harm_future_gen, "1" = "Not at all", "2" = "Only a little", "3" = "A moderate amount", "4" = "A great deal"),
         
         harm_plants_animals = recode(harm_plants_animals, "1" = "Not at all", "2" = "Only a little", "3" = "A moderate amount",
                                      "4" = "A great deal"),
         
         when_harm_US = recode(when_harm_US, "1" = "Never", "2" = "one_hun_years", "3" = "fifty_years", "4" = "twn_five_years",
                               "5" = "ten_years", "6" = "right_now"),
         
         region9 = recode(region9, "1" = "New England", "2" = " Mid_Atlantic", "3" = "EN_Central", "4" = "WN_Central", "5" = "S_Atlantic",
                          "6" = "ES_Central", "7" = "WS_Central", "8" = "Mountain", "9" = "Pacific"),

         
         generation = recode(generation, "1" = "GenZ", "2" = "Millennials", "3" = "GenerationX", "4" = "BabyBoomers", "5" = "Silent",
                             "6" = "Greatest"),
         
         education = recode(education, "1" = "lessHighSchool", "2" = "highSchool", "3" = "someCollege", "4" = "bachelorDegreeUp"))
# getting rid of data where people refused to answer (-1) or said they "don't know" (0)
myData = filter(myData, !(cause_recoded %in% c("-1", "1", "2")))
myData = filter(myData, worry != "-1")
myData = filter(myData, !(happening %in% c("-1", "2"))) # 2 is "don't know"
myData = filter(myData, !(harm_personally %in% c("-1", "0")))
myData = filter(myData, !(harm_US %in% c("-1", "0")))
myData = filter(myData, !(harm_future_gen %in% c("-1", "0")))
myData = filter(myData, !(harm_plants_animals %in% c("-1", "0")))
myData = filter(myData, generation != "Greatest") # get rid of Greatest because they only had 1 respondent in 2021
myData = filter(myData, when_harm_US != "-1")
# checking dataset
head(myData, 20)
# A tibble: 20 × 13
   wave     year      happening cause_recoded      worry harm_personally harm_US
   <chr>    <dbl+lbl> <fct>     <fct>              <dbl> <fct>           <fct>  
 1 Oct 2023 15 [2023] Yes       human_act              4 A great deal    A grea…
 2 Oct 2023 15 [2023] Yes       Nat_changes            3 A moderate amo… A grea…
 3 Oct 2023 15 [2023] Yes       human_act              3 A great deal    A grea…
 4 Oct 2023 15 [2023] Yes       Nat_changes            2 A moderate amo… A grea…
 5 Oct 2023 15 [2023] No        Nat_changes            1 Not at all      Not at…
 6 Oct 2023 15 [2023] No        CC_not_happening       1 Not at all      Not at…
 7 Oct 2023 15 [2023] No        Nat_changes            1 Not at all      A mode…
 8 Oct 2023 15 [2023] Yes       human_act              4 A moderate amo… A mode…
 9 Oct 2023 15 [2023] Yes       human_act              3 A moderate amo… A mode…
10 Oct 2023 15 [2023] Yes       human_act              3 A great deal    A grea…
11 Oct 2023 15 [2023] Yes       human_act              3 A great deal    A grea…
12 Oct 2023 15 [2023] Yes       Nat_changes            4 A great deal    A grea…
13 Oct 2023 15 [2023] Yes       Nat_changes            3 Only a little   Only a…
14 Oct 2023 15 [2023] Yes       human_act              3 A moderate amo… A mode…
15 Oct 2023 15 [2023] Yes       human_act              3 A great deal    A grea…
16 Oct 2023 15 [2023] Yes       Nat_changes            3 A moderate amo… A mode…
17 Oct 2023 15 [2023] Yes       Nat_changes            3 A moderate amo… A mode…
18 Oct 2023 15 [2023] No        human_act              2 Only a little   A mode…
19 Oct 2023 15 [2023] Yes       Nat_changes            3 A great deal    A grea…
20 Oct 2023 15 [2023] Yes       Human_act_and_nat…     2 Not at all      Not at…
# ℹ 6 more variables: harm_future_gen <fct>, harm_plants_animals <fct>,
#   when_harm_US <fct>, region9 <fct>, generation <fct>, education <fct>
# creating the reference variable now
myData$happening <- relevel(myData$happening, ref = "No")
myData$education <- relevel(myData$education, ref = "lessHighSchool")
myData$cause_recoded <- relevel(myData$cause_recoded, ref = "Human_act_and_nat_changes")
myData$harm_personally <- relevel(myData$harm_personally, ref = "Not at all")
myData$harm_US <- relevel(myData$harm_US, ref = "Not at all")
myData$harm_future_gen <- relevel(myData$harm_future_gen, ref = "Not at all")
myData$harm_plants_animals <- relevel(myData$harm_plants_animals, ref = "Not at all")
myData$when_harm_US <- relevel(myData$when_harm_US, ref = "Never")

Creating Subsets of the Data (there are two)

sub_hap <- myData

# only getting people you believe climate change is happening
sub_hap <- filter(sub_hap, happening != "No")

# sub_not_hap <- myData
# 
# sub_not_hap <- filter(sub_not_hap, happening != "Yes")
# checking dataset
#head(sub_hap)
#head(sub_not_hap)
# a subset from the previous subset
## a subset of people who believe climate change is happening and who think human activity is contributing to climate change in some way

#sub_cause_hum <- sub_hap
sub_cause_hum <- myData

sub_cause_hum <- filter(sub_cause_hum, !(cause_recoded %in% c("CC_not_happening", "Nat_changes")))

# sub_cause_nohum <- sub_hap
# sub_cause_nohum <- myData
# 
# 
# sub_cause_nohum <- filter(sub_cause_nohum, !(cause_recoded %in% c("Human_act_and_nat_changes", "human_act")))

#head(sub_cause)
# head(sub_cause_nohum)
sub_cause_hap <- sub_hap

sub_cause_hap <- filter(sub_cause_hap, !(cause_recoded %in% c("CC_not_happening", "Nat_changes")))

Visualization of Data

US Map

This is a work in progress right now…

graph_map <- myData %>%
  group_by(region9, worry, harm_personally, harm_US, harm_future_gen, harm_plants_animals, when_harm_US) %>%
  mutate(region9 = recode(region9,
                        "1" = "New England",
                        "2" = "Mid-Atlantic",
                        "3" = "East-North Central",
                        "4" = "West-North Central",
                        "5" = "South Atlantic",
                        "6" = "East-South Central",
                        "7" = "West-South Central", 
                        "8" = "Mountain",
                        "9" = "Pacific"))

graph_map <- graph_map %>%
  group_by(region9) %>% 
  summarise(worry_avg = mean(worry), .groups = 'drop')

graph_map
# A tibble: 9 × 2
  region9         worry_avg
  <fct>               <dbl>
1 "New England"        3.27
2 " Mid_Atlantic"      2.80
3 "EN_Central"         2.92
4 "WN_Central"         3.12
5 "S_Atlantic"         2.82
6 "ES_Central"         2.73
7 "WS_Central"         3.01
8 "Mountain"           2.88
9 "Pacific"            2.98
library(usmap)

plot_usmap(include = .east_north_central, labels = T)

plot_usmap(include = .east_south_central, labels = T)

plot_usmap(include = .mid_atlantic, labels = T)

plot_usmap(include = .mountain, labels = T)

plot_usmap(include = .new_england, labels = T)

plot_usmap(include = .pacific, labels = T)

plot_usmap(include = .south_atlantic, labels = T)

plot_usmap(include = .west_north_central, labels = T)

plot_usmap(include = .west_south_central, labels = T)

library(sf)
Warning: package 'sf' was built under R version 4.3.3
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
usa <- st_as_sf(maps::map("state", fill=TRUE, plot =FALSE))

ggplot(usa) +
  geom_sf(color = "#2b2b2b", fill = "white", size=0.125) +
  coord_sf(crs = st_crs("+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs"), datum = NA) +
  ggthemes::theme_map()

% of Worry

graph_myD <- myData %>%
  group_by(worry) %>%
  summarise(count = n(),
            percentage = round((n() / nrow(myData)) * 100), 2) %>% 
  mutate(worry = recode(worry,
                        "1" = "Not at all worried",
                        "2" = "Not very worried",
                        "3" = "Somewhat worried",
                        "4" = "Very worried"))

p1 <- plot_ly(data = graph_myD, x = ~ worry,
              y = ~ percentage,
              text = ~ paste(percentage, "%"),
              textposition = "inside",
              hovertext = ~paste(worry, "\n", "Percentage = ", percentage, "%"),
              hoverinfo = "text") %>%
        add_bars() %>% 
        layout(title = "Worry of Climate Change",
                xaxis = list(title = "Worry"),
                yaxis = list(title = "Percentage"))

p1

Harm types

harms <- myData %>% 
  group_by(harm_personally, harm_US, harm_future_gen, harm_plants_animals) #%>% 
  # mutate(harm_personally = recode(harm_personally, "1" = "Not at all", "2" = "Only a little", "3" = "A moderate amount", "4" = "A great deal"),
  #        harm_US = recode(harm_US, "1" = "Not at all", "2" = "Only a little", "3" = "A moderate amount", "4" = "A great deal"),
  #        harm_future_gen = recode(harm_future_gen, "1" = "Not at all", "2" = "Only a little", "3" = "A moderate amount", "4" = "A great deal"),
  #        harm_plants_animals = recode(harm_plants_animals, "1" = "Not at all", "2" = "Only a little", "3" = "A moderate amount",
  #                                     "4" = "A great deal"))

#harms

harm_longer <- pivot_longer(harms,
                            cols = starts_with("harm_"),
                            names_to = "harm_type",
                            values_to = "response") %>% 
  count(harm_type, response)

response_counts <- harm_longer %>% 
  group_by(response) %>% 
  summarise(total = sum(n))

harm_longer <- harm_longer %>% 
  left_join(response_counts, by = "response") %>% 
  mutate(percentage = round((n / total) * 100), 2)

harm_longer$response <- factor(harm_longer$response,
                               levels = c("Not at all", "Only a little", "A moderate amount", "A great deal"))
p2 <- plot_ly(harm_longer, x = ~response, y = ~percentage, color = ~harm_type, type = "bar",
              barmode = "group", 
              text = ~ paste(percentage, "%"), 
              textposition = "outside",
              hovertext = ~paste(harm_type, "\n", "Percentage = ", percentage, "%"),
              hoverinfo = "text") %>%
  layout(title = "Percentage of Preceived Harm from Climate Change",
                xaxis = list(title = "Harm Level"),
                yaxis = list(title = "Percentage"))

p2
Warning: 'bar' objects don't have these attributes: 'barmode'
Valid attributes include:
'_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Warning: 'bar' objects don't have these attributes: 'barmode'
Valid attributes include:
'_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Warning: 'bar' objects don't have these attributes: 'barmode'
Valid attributes include:
'_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Warning: 'bar' objects don't have these attributes: 'barmode'
Valid attributes include:
'_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Worry by Recorded Cause (an example for worry by some variable)

wHappening <- myData %>%
  group_by(happening) %>% 
  mutate(happening_count = n()) %>% 
  group_by(happening, worry) %>% 
  mutate(worry = recode(worry,
                        "1" = "Not at all worried",
                        "2" = "Not very worried",
                        "3" = "Somewhat worried",
                        "4" = "Very worried")) %>% 
  summarise(count = n(),
            percentage = round((count / first(happening_count)) * 100)) %>% 
  ungroup()
`summarise()` has grouped output by 'happening'. You can override using the
`.groups` argument.
#wCause[nrow(wCause) + 1,] <- list("Climate Change Not Happening", "Very worried", 0, 0)

wHappening <- data.frame(wHappening)
wHappening$happening <- factor(wHappening$happening,
                   levels = c("No", "Yes"))

wHappening$worry <- factor(wHappening$worry,
                   levels = c("Not at all worried", "Not very worried", "Somewhat worried", "Very worried"))


g <- ggplot(wHappening, aes(x = happening, y = percentage)) +
  geom_bar(stat = "identity", fill = "#AE445A") +
  labs(title = "Worry of Climate Change by Belief of Climate Change Happening", x = "Happening", y = "Percentage") +
  theme_bw() +
  facet_wrap(~worry) +
  coord_flip()

p <- ggplotly(g)

p
wCause <- myData %>%
  group_by(cause_recoded) %>% 
  mutate(cause_recoded = recode(cause_recoded, 
                                "CC_not_happening" = "Climate Change Not Happening",
                                "Nat_changes" = "Natural Change",
                                "Human_act_and_nat_changes" = "Human Activities and Natural Change",
                                "human_act" = "Human Activities")) %>% 
  mutate(cause_count = n()) %>% 
  group_by(cause_recoded, worry) %>% 
  mutate(worry = recode(worry,
                        "1" = "Not at all worried",
                        "2" = "Not very worried",
                        "3" = "Somewhat worried",
                        "4" = "Very worried")) %>% 
  summarise(count = n(),
            percentage = round((count / first(cause_count)) * 100)) %>% 
  ungroup()
`summarise()` has grouped output by 'cause_recoded'. You can override using the
`.groups` argument.
wCause[nrow(wCause) + 1,] <- list("Climate Change Not Happening", "Very worried", 0, 0)

wCause <- data.frame(wCause)

#wCause
wCause$cause_recoded <- factor(wCause$cause_recoded,
                   levels = c("Climate Change Not Happening", "Natural Change", "Human Activities and Natural Change", "Human Activities"))

wCause$worry <- factor(wCause$worry,
                   levels = c("Not at all worried", "Not very worried", "Somewhat worried", "Very worried"))


g <- ggplot(wCause, aes(x = cause_recoded, y = percentage)) +
  geom_bar(stat = "identity", fill = "#023C5D") +
  labs(title = "Worry of Climate Change by Recorded Cause", x = "Recorded Cause", y = "Percentage") +
  theme_bw() +
  facet_wrap(~worry) +
  coord_flip()

p <- ggplotly(g)

p
wPersonally <- myData %>%
  group_by(harm_personally) %>% 
  mutate(per_count = n()) %>% 
  group_by(harm_personally, worry) %>% 
  mutate(worry = recode(worry,
                        "1" = "Not at all worried",
                        "2" = "Not very worried",
                        "3" = "Somewhat worried",
                        "4" = "Very worried")) %>% 
  summarise(count = n(),
            percentage = round((count / first(per_count)) * 100)) %>% 
  ungroup()
`summarise()` has grouped output by 'harm_personally'. You can override using
the `.groups` argument.
#wCause[nrow(wCause) + 1,] <- list("Climate Change Not Happening", "Very worried", 0, 0)

wPersonally <- data.frame(wPersonally)
wPersonally$harm_personally <- factor(wPersonally$harm_personally,
                   levels = c("Not at all", "Only a little", "A moderate amount", "A great deal"))

wPersonally$worry <- factor(wPersonally$worry,
                   levels = c("Not at all worried", "Not very worried", "Somewhat worried", "Very worried"))


g <- ggplot(wPersonally, aes(x = harm_personally, y = percentage)) +
  geom_bar(stat = "identity", fill = "#3B1E54") +
  labs(title = "Worry of Climate Change by Belief of Personal Harm", x = "Personal Harm", y = "Percentage") +
  theme_bw() +
  facet_wrap(~worry) +
  coord_flip()

p <- ggplotly(g)

p
wUS <- myData %>%
  group_by(harm_US) %>% 
  mutate(US_count = n()) %>% 
  group_by(harm_US, worry) %>% 
  mutate(worry = recode(worry,
                        "1" = "Not at all worried",
                        "2" = "Not very worried",
                        "3" = "Somewhat worried",
                        "4" = "Very worried")) %>% 
  summarise(count = n(),
            percentage = round((count / first(US_count)) * 100)) %>% 
  ungroup()
`summarise()` has grouped output by 'harm_US'. You can override using the
`.groups` argument.
#wCause[nrow(wCause) + 1,] <- list("Climate Change Not Happening", "Very worried", 0, 0)

wUS <- data.frame(wUS)
wUS$harm_US <- factor(wUS$harm_US,
                   levels = c("Not at all", "Only a little", "A moderate amount", "A great deal"))

wUS$worry <- factor(wUS$worry,
                   levels = c("Not at all worried", "Not very worried", "Somewhat worried", "Very worried"))


g <- ggplot(wUS, aes(x = harm_US, y = percentage)) +
  geom_bar(stat = "identity", fill = "#FF2929") +
  labs(title = "Worry of Climate Change by Belief of Harm to US", x = "Harm to US", y = "Percentage") +
  theme_bw() +
  facet_wrap(~worry) +
  coord_flip()

p <- ggplotly(g)

p
wFuture <- myData %>%
  group_by(harm_future_gen) %>% 
  mutate(future_count = n()) %>% 
  group_by(harm_future_gen, worry) %>% 
  mutate(worry = recode(worry,
                        "1" = "Not at all worried",
                        "2" = "Not very worried",
                        "3" = "Somewhat worried",
                        "4" = "Very worried")) %>% 
  summarise(count = n(),
            percentage = round((count / first(future_count)) * 100)) %>% 
  ungroup()
`summarise()` has grouped output by 'harm_future_gen'. You can override using
the `.groups` argument.
#wCause[nrow(wCause) + 1,] <- list("Climate Change Not Happening", "Very worried", 0, 0)

wFuture <- data.frame(wFuture)
wFuture$harm_future_gen <- factor(wFuture$harm_future_gen,
                   levels = c("Not at all", "Only a little", "A moderate amount", "A great deal"))

wFuture$worry <- factor(wFuture$worry,
                   levels = c("Not at all worried", "Not very worried", "Somewhat worried", "Very worried"))


g <- ggplot(wFuture, aes(x = harm_future_gen, y = percentage)) +
  geom_bar(stat = "identity", fill = "#FAB12F") +
  labs(title = "Worry of Climate Change by Belief of Harm to Future Gens", x = "Harm to Future Gens", y = "Percentage") +
  theme_bw() +
  facet_wrap(~worry) +
  coord_flip()

p <- ggplotly(g)

p
wPA <- myData %>%
  group_by(harm_plants_animals) %>% 
  mutate(PA_count = n()) %>% 
  group_by(harm_plants_animals, worry) %>% 
  mutate(worry = recode(worry,
                        "1" = "Not at all worried",
                        "2" = "Not very worried",
                        "3" = "Somewhat worried",
                        "4" = "Very worried")) %>% 
  summarise(count = n(),
            percentage = round((count / first(PA_count)) * 100)) %>% 
  ungroup()
`summarise()` has grouped output by 'harm_plants_animals'. You can override
using the `.groups` argument.
#wCause[nrow(wCause) + 1,] <- list("Climate Change Not Happening", "Very worried", 0, 0)

wPA <- data.frame(wPA)
wPA$harm_plants_animals <- factor(wPA$harm_plants_animals,
                   levels = c("Not at all", "Only a little", "A moderate amount", "A great deal"))

wPA$worry <- factor(wPA$worry,
                   levels = c("Not at all worried", "Not very worried", "Somewhat worried", "Very worried"))


g <- ggplot(wPA, aes(x = harm_plants_animals, y = percentage)) +
  geom_bar(stat = "identity", fill = "#54473F") +
  labs(title = "Worry of Climate Change by Belief of Harm to Plants & Animals", x = "Harm to Plants & Animals", y = "Percentage") +
  theme_bw() +
  facet_wrap(~worry) +
  coord_flip()

p <- ggplotly(g)

p
wWhen <- myData %>%
  group_by(when_harm_US) %>% 
  mutate(when_count = n()) %>% 
  group_by(when_harm_US, worry) %>% 
  mutate(worry = recode(worry,
                        "1" = "Not at all worried",
                        "2" = "Not very worried",
                        "3" = "Somewhat worried",
                        "4" = "Very worried")) %>% 
  summarise(count = n(),
            percentage = round((count / first(when_count)) * 100)) %>% 
  ungroup()
`summarise()` has grouped output by 'when_harm_US'. You can override using the
`.groups` argument.
#wCause[nrow(wCause) + 1,] <- list("Climate Change Not Happening", "Very worried", 0, 0)

wWhen <- data.frame(wWhen)
wWhen$when_harm_US <- factor(wWhen$when_harm_US,
                   levels = c("Never", "one_hun_years", "fifty_years", "twn_five_years", "ten_years", "right_now"))

wWhen <- wWhen %>% 
  mutate(when_harm_US = recode(when_harm_US, "one_hun_years" = "100 years", "fifty_years" = "50 years", "twn_five_years" = "25 years",
                               "ten_years" = "10 years", "right_now" = "Right Now"))

wWhen$worry <- factor(wWhen$worry,
                   levels = c("Not at all worried", "Not very worried", "Somewhat worried", "Very worried"))


g <- ggplot(wWhen, aes(x = when_harm_US, y = percentage)) +
  geom_bar(stat = "identity", fill = "#FF885B") +
  labs(title = "Worry of Climate Change by When it will Harm the US", x = "When it will Harm the US", y = "Percentage") +
  theme_bw() +
  facet_wrap(~worry) +
  coord_flip()

p <- ggplotly(g)

p

Exploring different types of heat maps (there are 3)

heat_data <- myData %>% 
  mutate(cause_recoded = recode(cause_recoded, "CC_not_happening" = 1, "Nat_changes" = 2, "Human_act_and_nat_changes" = 3,
                                "human_act" = 4),
         
         happening = recode(happening, "No" = 1, "Yes" = 2),
         
         harm_personally = recode(harm_personally, "Not at all" = 1, "Only a little" = 2, "A moderate amount" = 3, "A great deal" = 4),
         
         harm_US = recode(harm_US, "Not at all" = 1, "Only a little" = 2, "A moderate amount" = 3, "A great deal" = 4),
         
         harm_future_gen = recode(harm_future_gen, "Not at all" = 1, "Only a little" = 2, "A moderate amount" = 3, "A great deal" = 4),
         
         harm_plants_animals = recode(harm_plants_animals, "Not at all" = 1, "Only a little" = 2, "A moderate amount" = 3, "A great deal" = 4),
         
         when_harm_US = recode(when_harm_US, "Never" = 1, "one_hun_years" = 2, "fifty_years" = 3, "twn_five_years" = 4,
                               "ten_years" = 5, "right_now" = 6))

heat_data <- heat_data %>% dplyr::select(worry, cause_recoded, happening, harm_personally, harm_US, harm_future_gen, harm_plants_animals,
                                         when_harm_US)
library(ggcorrplot)
library(reshape2)

Attaching package: 'reshape2'
The following object is masked from 'package:tidyr':

    smiths
variable_order <- c("worry", setdiff(names(heat_data), "worry"))

heat_data <- heat_data[, variable_order]

corr_mat <- round(cor(heat_data), 2)

p_mat <- cor_pmat(heat_data)

corr_mat <- ggcorrplot(
  corr_mat, type = "lower",
  outline.color = "white",
  p.mat = p_mat,
  lab = F) +
  labs(title = "Correlation plot of all variables", x = "Variable 1", y = "Variable 2")
pl <- corr_mat + scale_fill_gradient2(limit = c(0.5,1), low = "blue", high =  "darkred", mid = "pink", midpoint = 0.75)
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
ggplotly(pl)
# ggplotly(corr_mat)

Tables

# testing my hypothesis with the ordinal logit regression 
fitData1 <- myData %>%
  mutate(worry = recode(worry, "1" = "Not at all worried", "2" = "Not very worried", "3" = "Somewhat worried", "4" = "Very worried"))

fitData1$worry <- as.factor(fitData1$worry)
# testing my hypothesis with the ordinal logit regression 
fitData2 <- sub_hap %>%
  mutate(worry = recode(worry, "1" = "Not at all worried", "2" = "Not very worried", "3" = "Somewhat worried", "4" = "Very worried"))

fitData2$worry <- as.factor(fitData2$worry)
# testing my hypothesis with the ordinal logit regression 
fitData3 <- sub_cause_hum %>%
  mutate(worry = recode(worry, "1" = "Not at all worried", "2" = "Not very worried", "3" = "Somewhat worried", "4" = "Very worried"))

fitData3$worry <- as.factor(fitData3$worry)
# testing my hypothesis with the ordinal logit regression 
fitData4 <- sub_cause_hap %>%
  mutate(worry = recode(worry, "1" = "Not at all worried", "2" = "Not very worried", "3" = "Somewhat worried", "4" = "Very worried"))

fitData4$worry <- as.factor(fitData4$worry)
# Ordinal logit model for all models

fitwM1 <- polr(worry ~ happening + cause_recoded + harm_personally + harm_US + harm_future_gen + harm_plants_animals + when_harm_US +
                 generation + education, data = fitData1, Hess = T)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning in polr(worry ~ happening + cause_recoded + harm_personally + harm_US +
: design appears to be rank-deficient, so dropping some coefs
fitwM2 <- polr(worry ~ happening + cause_recoded + harm_personally + harm_US + harm_future_gen + harm_plants_animals + when_harm_US +
                 generation + education, data = fitData2, Hess = T)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning in polr(worry ~ happening + cause_recoded + harm_personally + harm_US +
: design appears to be rank-deficient, so dropping some coefs
fitwM3 <- polr(worry ~ happening + cause_recoded + harm_personally + harm_US + harm_future_gen + harm_plants_animals + when_harm_US +
                 generation + education, data = fitData3, Hess = T)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning in polr(worry ~ happening + cause_recoded + harm_personally + harm_US +
: design appears to be rank-deficient, so dropping some coefs
fitwM4 <- polr(worry ~ happening + cause_recoded + harm_personally + harm_US + harm_future_gen + harm_plants_animals + when_harm_US +
                 generation + education, data = fitData4, Hess = T)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning in polr(worry ~ happening + cause_recoded + harm_personally + harm_US +
: design appears to be rank-deficient, so dropping some coefs
myModels <- list(fitwM1, fitwM2, fitwM3, fitwM4)

stargazer(myModels, align = TRUE, type = "text", report = ('vc*p'), column.sep.width = "-1pt",  no.space = TRUE, title = "Ordered Logistic Regression Results", column.labels = c("Model 1","Happening", "Human Involvemnt", "Hap and HI"), omit = c("cause_recodedCC_not_happening",
                                                                                           "when_harm_USfifty_years",
                                                                                           "generation", "when_harm_UStwn_five_years",
                                                                                             "education"))

Ordered Logistic Regression Results
=========================================================================================
                                                     Dependent variable:                 
                                     ----------------------------------------------------
                                                            worry                        
                                       Model 1    Happening  Human Involvemnt Hap and HI 
                                         (1)         (2)           (3)            (4)    
-----------------------------------------------------------------------------------------
happeningYes                            0.379                    2.241***                
                                      p = 0.297                 p = 0.008                
cause_recodedNat_changes              -0.858**    -0.957**                               
                                      p = 0.024   p = 0.016                              
cause_recodedhuman_act                  0.139       0.252         0.300          0.377   
                                      p = 0.686   p = 0.476     p = 0.406      p = 0.301 
harm_personallyOnly a little          1.395***    1.573***       2.122***      2.328***  
                                     p = 0.0004   p = 0.001     p = 0.002      p = 0.001 
harm_personallyA moderate amount      2.049***    2.174***       2.565***      2.769***  
                                     p = 0.00001 p = 0.00002    p = 0.0002    p = 0.0001 
harm_personallyA great deal           2.595***    2.731***       3.124***      3.306***  
                                     p = 0.00000 p = 0.00000   p = 0.00001    p = 0.00001
harm_USOnly a little                   -0.048      -0.237         -0.953        -1.435   
                                      p = 0.920   p = 0.718     p = 0.351      p = 0.195 
harm_USA moderate amount                0.292       0.219         -0.288        -0.775   
                                      p = 0.600   p = 0.760     p = 0.780      p = 0.488 
harm_USA great deal                    1.502**     1.449*         0.818          0.315   
                                      p = 0.012   p = 0.053     p = 0.436      p = 0.782 
harm_future_genOnly a little           0.982*      -0.429         0.637          0.718   
                                      p = 0.071   p = 0.662     p = 0.652      p = 0.696 
harm_future_genA moderate amount       1.576**      0.581         1.314          1.408   
                                      p = 0.019   p = 0.589     p = 0.398      p = 0.489 
harm_future_genA great deal           2.160***      1.034         1.846          1.916   
                                      p = 0.003   p = 0.347     p = 0.235      p = 0.345 
harm_plants_animalsOnly a little        0.509       1.498        3.388**        3.409*   
                                      p = 0.341   p = 0.157     p = 0.047      p = 0.100 
harm_plants_animalsA moderate amount   1.107*      1.926*         3.093*         2.810   
                                      p = 0.100   p = 0.092     p = 0.079      p = 0.196 
harm_plants_animalsA great deal        1.708**     2.565**       3.832**         3.574   
                                      p = 0.017   p = 0.029     p = 0.030      p = 0.101 
when_harm_USone_hun_years               0.416      1.515**       3.568***       3.777**  
                                      p = 0.348   p = 0.035     p = 0.008      p = 0.026 
when_harm_USten_years                  1.303**     1.887**       4.162***       4.341**  
                                      p = 0.014   p = 0.011     p = 0.003      p = 0.012 
when_harm_USright_now                 2.024***    2.506***       4.887***      5.005***  
                                     p = 0.00005 p = 0.0005     p = 0.0004     p = 0.004 
-----------------------------------------------------------------------------------------
Observations                             818         678           560            546    
=========================================================================================
Note:                                                         *p<0.1; **p<0.05; ***p<0.01
heat_data$happening_scale <- scale(heat_data$happening)
heat_data$cause_recoded_scale <- scale(heat_data$cause_recoded)
heat_data$harm_personally_scale <- scale(heat_data$harm_personally)
heat_data$harm_US_scale <- scale(heat_data$harm_US)
heat_data$harm_future_gen_scale <- scale(heat_data$harm_future_gen)
heat_data$harm_plants_animals_scale <- scale(heat_data$harm_plants_animals)
heat_data$when_harm_US_scale <- scale(heat_data$when_harm_US)

fitData_stan <- heat_data %>%
  mutate(worry = recode(worry, "1" = "Not at all worried", "2" = "Not very worried", "3" = "Somewhat worried", "4" = "Very worried"))

fitData_stan$worry <- as.factor(fitData_stan$worry)


stan_model <- polr(worry ~ happening + cause_recoded + harm_personally + harm_US + harm_future_gen + harm_plants_animals + when_harm_US,
                   data = fitData_stan, Hess = T)

myModels <- list(stan_model)

stargazer(myModels, align = TRUE, type = "text", report = ('vc*p'), column.sep.width = "-1pt",  no.space = TRUE, title = "Ordered Logistic Regression Results", column.labels = c("Standardized"), omit = c("cause_recodedCC_not_happening",
                                                                                           "when_harm_USfifty_years", 
                                                                                           "generation", "when_harm_UStwn_five_years",
                                                                                             "education"))

Ordered Logistic Regression Results
===============================================
                        Dependent variable:    
                    ---------------------------
                               worry           
                           Standardized        
-----------------------------------------------
happening                      0.304           
                             p = 0.373         
cause_recoded                0.486***          
                            p = 0.00001        
harm_personally              0.744***          
                            p = 0.00000        
harm_US                      0.730***          
                            p = 0.00001        
harm_future_gen               0.530**          
                             p = 0.011         
harm_plants_animals           0.489**          
                             p = 0.020         
when_harm_US                 0.515***          
                             p = 0.000         
-----------------------------------------------
Observations                    818            
===============================================
Note:               *p<0.1; **p<0.05; ***p<0.01

Model fit

# AIC Model 1
glance(fitwM1)$AIC
[1] 1135.178
# AIC Model 2
glance(fitwM2)$AIC
[1] 972.0797
glance(fitwM3)$AIC
[1] 769.4636
glance(fitwM4)$AIC
[1] 755.8018
# BIC Model 1
glance(fitwM1)$BIC
[1] 1281.091
# BIC Model 2
glance(fitwM2)$BIC
[1] 1103.135
glance(fitwM3)$BIC
[1] 894.9738
glance(fitwM4)$BIC
[1] 876.2751

Interpreting my Model (Model #2)

# making a nice table for my model

modelTable <- coef(summary(fitwM4))

pValue <- pnorm(abs(modelTable[, "t value"]), lower.tail = F) * 2

modelTable <- cbind(modelTable, "p value" = pValue)

stargazer(modelTable, type = "text", title = "My Final Model: Model #2 Table")

My Final Model: Model #2 Table
======================================================================
                                     Value  Std. Error t value p value
----------------------------------------------------------------------
cause_recodedhuman_act               0.377    0.364     1.036   0.300 
harm_personallyOnly a little         2.328    0.696     3.343   0.001 
harm_personallyA moderate amount     2.769    0.710     3.900  0.0001 
harm_personallyA great deal          3.306    0.747     4.427  0.00001
harm_USOnly a little                 -1.435   1.105    -1.298   0.194 
harm_USA moderate amount             -0.775   1.116    -0.694   0.487 
harm_USA great deal                  0.315    1.135     0.277   0.781 
harm_future_genOnly a little         0.718    1.834     0.391   0.695 
harm_future_genA moderate amount     1.408    2.032     0.693   0.488 
harm_future_genA great deal          1.916    2.028     0.945   0.345 
harm_plants_animalsOnly a little     3.409    2.067     1.649   0.099 
harm_plants_animalsA moderate amount 2.810    2.172     1.294   0.196 
harm_plants_animalsA great deal      3.574    2.179     1.640   0.101 
when_harm_USone_hun_years            3.777    1.692     2.232   0.026 
when_harm_USfifty_years              2.440    1.711     1.426   0.154 
when_harm_UStwn_five_years           2.998    1.698     1.765   0.078 
when_harm_USten_years                4.341    1.712     2.535   0.011 
when_harm_USright_now                5.005    1.694     2.955   0.003 
generationMillennials                0.389    0.373     1.043   0.297 
generationGenerationX                0.281    0.375     0.748   0.454 
generationBabyBoomers                0.231    0.365     0.632   0.527 
generationSilent                     -0.277   0.495    -0.560   0.575 
educationhighSchool                  0.500    0.527     0.948   0.343 
educationsomeCollege                 0.581    0.523     1.112   0.266 
educationbachelorDegreeUp            0.524    0.502     1.043   0.297 
Not at all worried| Not very worried 4.406    2.033     2.168   0.030 
Not very worried| Somewhat worried   9.259    2.589     3.577  0.0003 
Somewhat worried| Very worried       13.586   2.620     5.185  0.00000
----------------------------------------------------------------------

Odds-Ratio

# Getting odds-ratio for model 3 

coeff <- round(exp(coef(fitwM2)), 3)

sig_var <- grep("personally|when", names(coeff), value = T)

sig_coeff <- coeff[sig_var]

coeff_df <- data.frame(variable = sig_var,
                     or = sig_coeff)
coeff_wdf <- coeff_df %>% 
  pivot_wider(names_from = variable, values_from = or)
#coeff_wdf

personal <- coeff_wdf %>% 
  select(contains('harm_personally')) %>% 
  rename('Only a little' = 'harm_personallyOnly a little', "A moderate amount" = 'harm_personallyA moderate amount',
         'A great deal' = 'harm_personallyA great deal') %>% 
  pivot_longer(cols = c('Only a little', 'A moderate amount', 'A great deal'),
               names_to = 'Harm',
               values_to = 'OR')
personal
# A tibble: 3 × 2
  Harm                 OR
  <chr>             <dbl>
1 Only a little      4.82
2 A moderate amount  8.80
3 A great deal      15.3 
when <- coeff_wdf %>% 
  select(contains('when_harm_US')) %>% 
  rename('Hundred years' = when_harm_USone_hun_years, 'Fifty years' = when_harm_USfifty_years, 'Twenty-five years' = when_harm_UStwn_five_years,
         'Ten years' = when_harm_USten_years, 'Right Now' = when_harm_USright_now) %>% 
  pivot_longer(cols = c('Hundred years', 'Fifty years', 'Twenty-five years', 'Ten years', 'Right Now'),
               names_to = 'Timing_of_CC_Harm',
               values_to = 'OR')
when
# A tibble: 5 × 2
  Timing_of_CC_Harm    OR
  <chr>             <dbl>
1 Hundred years      4.55
2 Fifty years        1.46
3 Twenty-five years  2.25
4 Ten years          6.60
5 Right Now         12.3 
when$Timing_of_CC_Harm <- factor(when$Timing_of_CC_Harm,
                   levels = c("Right Now", "Ten years", "Twenty-five years", "Fifty years", "Hundred years"))
per_or_bar <- plot_ly(data = personal,
                  x = ~ Harm,
                  y = ~ OR,
                  type = "bar",
                  text = ~ paste(OR), 
                  textposition = "outside",
                  hovertext = ~paste(Harm, "\n", "Odds-Ratio = ", OR),
                  hoverinfo = "text",
                  marker = list(color = "lightblue")) %>% 
  layout(title = "Odds-ratio for Predictors of Climate Change Worry",
         xaxis = list(title = "Variables"),
         yaxis = list(title = "Odds Ratio"))

per_or_bar
per_or_bar <- plot_ly(data = when,
                  x = ~ Timing_of_CC_Harm,
                  y = ~ OR,
                  type = "bar",
                  text = ~ paste(OR), 
                  textposition = "outside",
                  hovertext = ~paste(Timing_of_CC_Harm, "\n", "Odds-Ratio = ", OR),
                  hoverinfo = "text",
                  marker = list(color = "purple")) %>% 
  layout(title = "Odds-ratio for Predictors of Climate Change Worry",
         xaxis = list(title = "Variables"),
         yaxis = list(title = "Odds Ratio"))

per_or_bar